df <- read.csv("merge-new-version.csv", header =TRUE, sep=",")
#df <- df[!complete.cases(df), ]   
df
df$ln_novelty <- log(df$novelty+1)
df$ln_total <- log(df$total+1) 
df$group = factor(df$group)
df
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_total ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = ln_total ~ factor(group), data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.7373 -0.2178  0.3298  0.8334  1.7253 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)      5.1441     0.1169  44.016  < 2e-16 ***
factor(group)0  -1.0003     0.1642  -6.093 1.94e-09 ***
factor(group)1  -0.4069     0.1612  -2.524 0.011849 *  
factor(group)2  -0.5990     0.1603  -3.737 0.000203 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.422 on 628 degrees of freedom
Multiple R-squared:  0.05796,   Adjusted R-squared:  0.05346 
F-statistic: 12.88 on 3 and 628 DF,  p-value: 3.561e-08
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_novelty ~ factor(group), data=df)
summary(mod)

Call:
lm(formula = ln_novelty ~ factor(group), data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.52892 -0.13345  0.06826  0.15783  0.28789 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.52892    0.01759  30.074  < 2e-16 ***
factor(group)0 -0.12226    0.02471  -4.948 9.64e-07 ***
factor(group)1 -0.12367    0.02426  -5.098 4.55e-07 ***
factor(group)2 -0.05178    0.02412  -2.147   0.0322 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.214 on 628 degrees of freedom
Multiple R-squared:  0.05431,   Adjusted R-squared:  0.04979 
F-statistic: 12.02 on 3 and 628 DF,  p-value: 1.163e-07
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod)

Call:
lm(formula = ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10 + count, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73108 -0.10789  0.05269  0.14730  0.30517 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.412100   0.035171  11.717  < 2e-16 ***
factor(group)0 -0.113961   0.024192  -4.711 3.06e-06 ***
factor(group)1 -0.116408   0.023889  -4.873 1.40e-06 ***
factor(group)2 -0.051286   0.023555  -2.177  0.02984 *  
Q7_Q7_1        -0.020611   0.006956  -2.963  0.00316 ** 
Q7_Q7_2         0.028904   0.007075   4.085 4.99e-05 ***
Q8_Q8_1         0.008860   0.007319   1.210  0.22656    
Q10             0.007122   0.010748   0.663  0.50783    
count           0.013293   0.002829   4.699 3.23e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2067 on 611 degrees of freedom
  (12 observations deleted due to missingness)
Multiple R-squared:  0.1234,    Adjusted R-squared:  0.112 
F-statistic: 10.75 on 8 and 611 DF,  p-value: 3.249e-14
df$group <- relevel(df$group, ref = "3")
mod1 <- lm(ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod1)

Call:
lm(formula = ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10 + count, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.73108 -0.10789  0.05269  0.14730  0.30517 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.412100   0.035171  11.717  < 2e-16 ***
factor(group)0 -0.113961   0.024192  -4.711 3.06e-06 ***
factor(group)1 -0.116408   0.023889  -4.873 1.40e-06 ***
factor(group)2 -0.051286   0.023555  -2.177  0.02984 *  
Q7_Q7_1        -0.020611   0.006956  -2.963  0.00316 ** 
Q7_Q7_2         0.028904   0.007075   4.085 4.99e-05 ***
Q8_Q8_1         0.008860   0.007319   1.210  0.22656    
Q10             0.007122   0.010748   0.663  0.50783    
count           0.013293   0.002829   4.699 3.23e-06 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.2067 on 611 degrees of freedom
  (12 observations deleted due to missingness)
Multiple R-squared:  0.1234,    Adjusted R-squared:  0.112 
F-statistic: 10.75 on 8 and 611 DF,  p-value: 3.249e-14
anova(mod, mod1)
Analysis of Variance Table

Model 1: ln_novelty ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + 
    count
Model 2: ln_novelty ~ factor(group) + factor(phase) + Q7_Q7_1 + Q7_Q7_2 + 
    Q8_Q8_1 + Q10 + count
  Res.Df    RSS Df Sum of Sq      F Pr(>F)
1    611 26.099                           
2    608 25.916  3   0.18332 1.4336 0.2319
library(lmerTest)
fit.lmer <- lmer(ln_novelty ~ factor(group) + ( 1 | phase), data = df, REML= FALSE)
fit.lmer
Linear mixed model fit by maximum likelihood  ['lmerModLmerTest']
Formula: ln_novelty ~ factor(group) + (1 | phase)
   Data: df
      AIC       BIC    logLik  deviance  df.resid 
-147.5364 -120.8431   79.7682 -159.5364       626 
Random effects:
 Groups   Name        Std.Dev.
 phase    (Intercept) 0.005858
 Residual             0.213203
Number of obs: 632, groups:  phase, 4
Fixed Effects:
   (Intercept)  factor(group)0  factor(group)1  factor(group)2  
       0.52892        -0.12226        -0.12367        -0.05178  
tapply(df$ln_novelty, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.4842  0.5588  0.5289  0.6162  0.6894 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.0000  0.5235  0.4067  0.6084  0.6858 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.1777  0.5062  0.4053  0.6182  0.6931 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.0000  0.3871  0.5465  0.4771  0.6084  0.6904 
tapply(df$ln_total, df$group, summary)
$`3`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  4.331   4.761   5.079   5.144   5.515   5.891 

$`0`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   4.106   4.836   4.144   5.337   5.869 

$`1`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   4.553   5.089   4.737   5.580   5.882 

$`2`
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.000   4.615   4.925   4.545   5.450   5.884 
library(vtree)
vtree(df, "group")
vtree(df, c("phase", "group"), 
   fillcolor = c( phase = "#e7d4e8", group = "#99d8c9"),
   horiz = FALSE)
df$group <- relevel(df$group, ref = "3")
mod <- lm(ln_total ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + Q10 + count, data=df)
summary(mod)

Call:
lm(formula = ln_total ~ factor(group) + Q7_Q7_1 + Q7_Q7_2 + Q8_Q8_1 + 
    Q10 + count, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.6309 -0.2310  0.3346  0.7764  1.9667 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)     4.82832    0.22926  21.060  < 2e-16 ***
factor(group)0 -0.98353    0.15769  -6.237 8.33e-10 ***
factor(group)1 -0.42360    0.15572  -2.720 0.006709 ** 
factor(group)2 -0.59841    0.15354  -3.897 0.000108 ***
Q7_Q7_1        -0.19585    0.04534  -4.319 1.83e-05 ***
Q7_Q7_2         0.19627    0.04612   4.256 2.41e-05 ***
Q8_Q8_1        -0.10504    0.04771  -2.202 0.028060 *  
Q10             0.17920    0.07006   2.558 0.010776 *  
count           0.12749    0.01844   6.914 1.19e-11 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.347 on 611 degrees of freedom
  (12 observations deleted due to missingness)
Multiple R-squared:  0.1768,    Adjusted R-squared:  0.166 
F-statistic:  16.4 on 8 and 611 DF,  p-value: < 2.2e-16
with(df, interaction.plot(group, phase, ln_total, ylim=c(0, max(ln_total)))) # interaction plot

with(df, interaction.plot(group, phase, ln_novelty, ylim=c(0, max(ln_novelty)))) # interaction plot

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQpkZiA8LSByZWFkLmNzdigibWVyZ2UtbmV3LXZlcnNpb24uY3N2IiwgaGVhZGVyID1UUlVFLCBzZXA9IiwiKQojZGYgPC0gZGZbIWNvbXBsZXRlLmNhc2VzKGRmKSwgXSAgIApkZgpgYGAKCgpgYGB7cn0KZGYkbG5fbm92ZWx0eSA8LSBsb2coZGYkbm92ZWx0eSsxKQpkZiRsbl90b3RhbCA8LSBsb2coZGYkdG90YWwrMSkgCmRmJGdyb3VwID0gZmFjdG9yKGRmJGdyb3VwKQpkZgpgYGAKCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZCA8LSBsbShsbl90b3RhbCB+IGZhY3Rvcihncm91cCksIGRhdGE9ZGYpCnN1bW1hcnkobW9kKQpgYGAKCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZCA8LSBsbShsbl9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSwgZGF0YT1kZikKc3VtbWFyeShtb2QpCmBgYAoKCmBgYHtyfQpkZiRncm91cCA8LSByZWxldmVsKGRmJGdyb3VwLCByZWYgPSAiMyIpCm1vZCA8LSBsbShsbl9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCArIGNvdW50LCBkYXRhPWRmKQpzdW1tYXJ5KG1vZCkKYGBgCgpgYGB7cn0KZGYkZ3JvdXAgPC0gcmVsZXZlbChkZiRncm91cCwgcmVmID0gIjMiKQptb2QxIDwtIGxtKGxuX25vdmVsdHkgfiBmYWN0b3IoZ3JvdXApICsgUTdfUTdfMSArIFE3X1E3XzIgKyBROF9ROF8xICsgUTEwICsgY291bnQsIGRhdGE9ZGYpCnN1bW1hcnkobW9kMSkKYGBgCgpgYGB7cn0KYW5vdmEobW9kLCBtb2QxKQpgYGAKCgpgYGB7cn0KbGlicmFyeShsbWVyVGVzdCkKZml0LmxtZXIgPC0gbG1lcihsbl9ub3ZlbHR5IH4gZmFjdG9yKGdyb3VwKSArICggMSB8IHBoYXNlKSwgZGF0YSA9IGRmLCBSRU1MPSBGQUxTRSkKZml0LmxtZXIKYGBgCgpgYGB7cn0KdGFwcGx5KGRmJGxuX25vdmVsdHksIGRmJGdyb3VwLCBzdW1tYXJ5KQpgYGAKCmBgYHtyfQp0YXBwbHkoZGYkbG5fdG90YWwsIGRmJGdyb3VwLCBzdW1tYXJ5KQpgYGAKYGBge3J9CmxpYnJhcnkodnRyZWUpCnZ0cmVlKGRmLCAiZ3JvdXAiKQpgYGAKYGBge3J9CnZ0cmVlKGRmLCBjKCJwaGFzZSIsICJncm91cCIpLCAKICAgZmlsbGNvbG9yID0gYyggcGhhc2UgPSAiI2U3ZDRlOCIsIGdyb3VwID0gIiM5OWQ4YzkiKSwKICAgaG9yaXogPSBGQUxTRSkKYGBgCgoKYGBge3J9CmRmJGdyb3VwIDwtIHJlbGV2ZWwoZGYkZ3JvdXAsIHJlZiA9ICIzIikKbW9kIDwtIGxtKGxuX3RvdGFsIH4gZmFjdG9yKGdyb3VwKSArIFE3X1E3XzEgKyBRN19RN18yICsgUThfUThfMSArIFExMCArIGNvdW50LCBkYXRhPWRmKQpzdW1tYXJ5KG1vZCkKYGBgCmBgYHtyfQp3aXRoKGRmLCBpbnRlcmFjdGlvbi5wbG90KGdyb3VwLCBwaGFzZSwgbG5fdG90YWwsIHlsaW09YygwLCBtYXgobG5fdG90YWwpKSkpICMgaW50ZXJhY3Rpb24gcGxvdApgYGAKCmBgYHtyfQp3aXRoKGRmLCBpbnRlcmFjdGlvbi5wbG90KGdyb3VwLCBwaGFzZSwgbG5fbm92ZWx0eSwgeWxpbT1jKDAsIG1heChsbl9ub3ZlbHR5KSkpKSAjIGludGVyYWN0aW9uIHBsb3QKYGBgCgo=